#####################
# Sleep Project - Pedro Bessone, Gautam Rao, Heather Schofield, Frank Schilbach, and Mattie Toma
# Purpose: Provides adjusted p-values for table 7 from the Appendix (Main Treatment Effects, 
#          Fully-Disaggregated and Including Multiple Hypothesis Testing Corrections). Takes both the
#          unadjusted p-values from table_A7.do and the RI p-values from table_A7_RI.do, performs the
#          step-down procedure, and exports the adjusted p-values.
# Last edited: 07 May 2021
#####################

rm(list = ls())

options(scipen = 99)

require(rio)
require(tidyverse)

# Load function that performs step-down adjustment 
source("Scripts/adjust_pval_step_down_fun.R")

# Load actual p-values
df.p = import("Datasets/pvals_unadjusted_table_interaction.csv") %>% as_tibble()
colnames(df.p) = str_remove(colnames(df.p), pattern = "1")

# Divide them into the families that we want to correct p-values
df.family = df.p %>% 
  filter( var_name %in%  c("earnings", "wellbeing", "cogindex", "pref") )

df.work = df.p %>% 
  filter( var_name %in%  c("prod", "typing", "output"))

df.wb = df.p %>% 
  filter( var_name %in%  c("physical", "psych"))

df.cog = df.p %>% 
  filter( var_name %in%  c("cogfunction", "attention"))

df.pref = df.p %>% 
  filter( var_name %in%  c("timepref", "social", "risk"))

# Load RI p-vals/t-stats
df.ri.pvals.raw = import("Datasets/mht_ri_pvals_table_disag.dta") %>% as_tibble()

# Keep only pvals and t-stats
df.ri.pvals = df.ri.pvals.raw %>% 
  select(contains(c("tstat_", "p_"))) %>% 
  select(!contains(c("break", "work")))

# Transform t-stats into p-vals
t_to_pval = function(t){
  pval = 2*pnorm(-abs(t))
}

df.ri.pvals = df.ri.pvals %>% 
  mutate(across(.cols = contains("tstat_"), .fns = t_to_pval))

# Rename t_stats to p_vals 
colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "tstat_")] = 
  str_replace(colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "tstat_")], pattern = "tstat_", replacement = "p_")

# Divide them into the families that we want to correct p-values
df.family.RI = df.ri.pvals %>% 
  select(contains(c("p_earnings", "p_wellbeing", "p_cogindex", "p_pref")))

df.work.RI = df.ri.pvals %>% 
  select(contains(c("prod", "typing", "output")))

df.wb.RI = df.ri.pvals %>% 
  select(contains(c("physical", "psych")))

df.cog.RI = df.ri.pvals %>% 
  select(contains(c("cogfunction", "attention")))

df.pref.RI = df.ri.pvals %>% 
  select(contains(c("timepref", "social", "risk")))

# Function to get p-vals for different comparisons
get_adjust_pvals = function(df.original, df.RI){
  ### Correct family-wise outcomes ####
  # Encouragement vs. Control
  p_val_original = df.original$p_val_s
  p_val_RI_mat   = df.RI %>% select(!contains(c("int", "nap", "s_i"))) %>% as.matrix
  p_val_mht_s    = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  # Incentive vs. Control
  p_val_original = df.original$p_val_s_i
  p_val_RI_mat   = df.RI %>% select(contains("s_i")) %>% select(!contains(c("int"))) %>% as.matrix
  p_val_mht_s_i    = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  # Nap vs. Control
  p_val_original = df.original$p_val_nap
  p_val_RI_mat   = df.RI %>% select(contains("nap")) %>% select(!contains(c("int"))) %>% as.matrix
  p_val_mht_nap   = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)

  # Encouragement + Nap vs. Control
  p_val_original = df.original$p_val_int_s
  p_val_RI_mat   = df.RI %>% select(contains("int")) %>% select(!contains(c("_s_i"))) %>% as.matrix
  p_val_mht_int_s    = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  # Incentive vs. Control
  p_val_original = df.original$p_val_int_s_i
  p_val_RI_mat   = df.RI %>% select(contains("s_i")) %>% select(contains(c("int"))) %>% as.matrix
  p_val_mht_int_s_i    = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  df.res = tibble(
    treatment = rep(c("Encouragement","Incentive", "Nap", "Encouragement + Nap","Incentive + Nap"),rep(nrow(df.original),5)),
    outcome = rep(df.original$var_name, 5),
    p_val_original = c(df.original$p_val_s, df.original$p_val_s_i, df.original$p_val_nap, df.original$p_val_int_s, df.original$p_val_int_s_i),
    p_val_adj = c(p_val_mht_s, p_val_mht_s_i, p_val_mht_nap, p_val_mht_int_s, p_val_mht_int_s_i)
    )
  
  return(df.res)
  
}

# Correction across family-wide outcomes
df.pval.adj.family = get_adjust_pvals(df.family, df.family.RI)
# Correction across work outcomes
df.pval.adj.work = get_adjust_pvals(df.work, df.work.RI)
# Correction across well-being outcomes
df.pval.adj.wb = get_adjust_pvals(df.wb, df.wb.RI)
# Correction across cognition outcomes
df.pval.adj.cog = get_adjust_pvals(df.cog, df.cog.RI)
# Correction across family-wide outcomes
df.pval.adj.pref = get_adjust_pvals(df.pref, df.pref.RI)

df.pval.all = bind_rows(
  df.pval.adj.family,
  df.pval.adj.work,
  df.pval.adj.wb,
  df.pval.adj.cog,
  df.pval.adj.pref
  )

export(df.pval.all, "Datasets/pval_adj_table_disag.csv")





